{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Imports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "from torch.autograd import Variable\n",
    "from torch.utils.data import DataLoader\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "\n",
    "import sys\n",
    "sys.path.insert(0, \"lib/\")\n",
    "from data.coco_dataset import CocoDataset\n",
    "from utils.preprocess_sample import preprocess_sample\n",
    "from utils.collate_custom import collate_custom\n",
    "from utils.utils import to_cuda_variable\n",
    "import utils.result_utils as result_utils\n",
    "from utils.json_dataset_evaluator import evaluate_boxes,evaluate_masks\n",
    "from model.detector import detector\n",
    "from utils.blob import get_rois_blob\n",
    "from utils.multilevel_rois import add_multilevel_rois_for_test\n",
    "\n",
    "torch_ver = torch.__version__[:3]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Parameters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Pretrained model\n",
    "# # https://s3-us-west-2.amazonaws.com/detectron/35859007/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_2x.yaml.01_49_07.By8nQcCH/output/train/coco_2014_train%3Acoco_2014_valminusminival/generalized_rcnn/model_final.pkl\n",
    "# arch='resnet50'\n",
    "# pretrained_model_file = 'files/trained_models/mask/e2e_mask_rcnn_R-50-FPN_2x.pkl'\n",
    "\n",
    "# https://s3-us-west-2.amazonaws.com/detectron/35861858/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_2x.yaml.02_32_51.SgT4y1cO/output/train/coco_2014_train:coco_2014_valminusminival/generalized_rcnn/model_final.pkl\n",
    "arch='resnet101'\n",
    "pretrained_model_file = 'files/trained_models/mask/e2e_mask_rcnn_R-101-FPN_2x.pkl'\n",
    "\n",
    "# COCO minival2014 dataset path\n",
    "coco_ann_file='datasets/data/coco/annotations/instances_minival2014.json'\n",
    "img_dir='datasets/data/coco/val2014'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Create dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "loading annotations into memory...\n",
      "Done (t=0.62s)\n",
      "creating index...\n",
      "index created!\n"
     ]
    }
   ],
   "source": [
    "dataset = CocoDataset(ann_file=coco_ann_file,img_dir=img_dir,\n",
    "                       sample_transform=preprocess_sample(target_sizes=[800],fpn_on=True))\n",
    "dataloader = DataLoader(dataset, batch_size=1, # only batch_size=1 is supported by now\n",
    "                        shuffle=False, num_workers=0, collate_fn=collate_custom)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Create detector model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading pretrained weights:\n",
      "-> loading conv. body weights\n",
      "-> loading output head weights\n",
      "-> loading rpn head weights\n",
      "-> loading mask head weights\n",
      "-> loading 1up4convs mask head weights\n",
      "-> loading FPN lateral weights\n",
      "-> loading two layer mlp conv head...\n"
     ]
    }
   ],
   "source": [
    "model = detector(arch=arch,\n",
    "                 detector_pkl_file=pretrained_model_file,\n",
    "                 conv_body_layers=['conv1','bn1','relu','maxpool','layer1','layer2','layer3','layer4'],\n",
    "                 conv_head_layers='two_layer_mlp',\n",
    "                 fpn_layers=['layer1','layer2','layer3','layer4'],\n",
    "                 fpn_extra_lvl=True,\n",
    "                 roi_height=7,\n",
    "                 roi_width=7,\n",
    "                 roi_spatial_scale=[0.25,0.125,0.0625,0.03125],\n",
    "                 roi_sampling_ratio=2,\n",
    "                 use_rpn_head = True,\n",
    "                 use_mask_head = True,\n",
    "                 mask_head_type = '1up4convs')\n",
    "model = model.cuda()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Evaluate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Create data structure to store results\n",
    "all_boxes, all_segms, all_keyps = result_utils.empty_results(dataset.num_classes, len(dataset)) \n",
    "# (only all_boxes will be used for fast RCNN)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1/5000\n",
      "101/5000\n",
      "201/5000\n",
      "301/5000\n",
      "401/5000\n",
      "501/5000\n",
      "601/5000\n",
      "701/5000\n",
      "801/5000\n",
      "901/5000\n",
      "1001/5000\n",
      "1101/5000\n",
      "1201/5000\n",
      "1301/5000\n",
      "1401/5000\n",
      "1501/5000\n",
      "1601/5000\n",
      "1701/5000\n",
      "1801/5000\n",
      "1901/5000\n",
      "2001/5000\n",
      "2101/5000\n",
      "2201/5000\n",
      "2301/5000\n",
      "2401/5000\n",
      "2501/5000\n",
      "2601/5000\n",
      "2701/5000\n",
      "2801/5000\n",
      "2901/5000\n",
      "3001/5000\n",
      "3101/5000\n",
      "3201/5000\n",
      "3301/5000\n",
      "3401/5000\n",
      "3501/5000\n",
      "3601/5000\n",
      "3701/5000\n",
      "3801/5000\n",
      "3901/5000\n",
      "4001/5000\n",
      "4101/5000\n",
      "4201/5000\n",
      "4301/5000\n",
      "4401/5000\n",
      "4501/5000\n",
      "4601/5000\n",
      "4701/5000\n",
      "4801/5000\n",
      "4901/5000\n",
      "Done!\n"
     ]
    }
   ],
   "source": [
    "# Compute detections for whole dataset\n",
    "for i, batch in enumerate(dataloader):\n",
    "    batch = to_cuda_variable(batch)\n",
    "    # forward pass\n",
    "    if torch_ver==\"0.4\": # handle change in \"volatile\"\n",
    "        with torch.no_grad():\n",
    "            class_scores,bbox_deltas,rois,img_features=model(batch['image'],\n",
    "                                                             scaling_factor=batch['scaling_factors'])   \n",
    "    else:\n",
    "        class_scores,bbox_deltas,rois,img_features=model(batch['image'],\n",
    "                                                             scaling_factor=batch['scaling_factors'])   \n",
    "    # postprocess output:\n",
    "    # - convert coordinates back to original image size, \n",
    "    # - treshold proposals based on score,\n",
    "    # - do NMS.\n",
    "    scores_final, boxes_final, boxes_per_class = result_utils.postprocess_output(rois,\n",
    "                                                                    batch['scaling_factors'],\n",
    "                                                                    batch['original_im_size'],\n",
    "                                                                    class_scores,\n",
    "                                                                    bbox_deltas)\n",
    "    if len(boxes_final)==0:\n",
    "        continue\n",
    "        \n",
    "    # compute masks\n",
    "    boxes_final_multiscale = add_multilevel_rois_for_test({'rois': boxes_final*batch['scaling_factors']},'rois')\n",
    "    boxes_final_multiscale_th = []\n",
    "    for k in boxes_final_multiscale.keys():\n",
    "        if len(boxes_final_multiscale[k])>0 and 'rois_fpn' in k:\n",
    "            boxes_final_multiscale_th.append(Variable(torch.cuda.FloatTensor(boxes_final_multiscale[k])))\n",
    "        elif len(boxes_final_multiscale[k])==0 and 'rois_fpn' in k:\n",
    "            boxes_final_multiscale_th.append(None)\n",
    "    rois_idx_restore_th = Variable(torch.cuda.FloatTensor(boxes_final_multiscale['rois_idx_restore_int32']))\n",
    "    masks=model.mask_head(img_features,boxes_final_multiscale_th,rois_idx_restore_th.long())\n",
    "    # postprocess mask output:\n",
    "    h_orig = int(batch['original_im_size'].squeeze()[0].data.cpu().numpy().item())\n",
    "    w_orig = int(batch['original_im_size'].squeeze()[1].data.cpu().numpy().item())\n",
    "    cls_segms = result_utils.segm_results(boxes_per_class, masks.cpu().data.numpy(), boxes_final, h_orig, w_orig,\n",
    "                                          M=28) # M: Mask R-CNN resolution\n",
    "    \n",
    "    # store results\n",
    "    result_utils.extend_results(i, all_boxes, boxes_per_class)\n",
    "    result_utils.extend_results(i, all_segms, cls_segms)\n",
    "    \n",
    "    if i%100==0:\n",
    "        print(\"{}/{}\".format(i+1,len(dataset)))\n",
    "\n",
    "print('Done!')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# # Save detection and segmentation results\n",
    "np.save('files/results/all_boxes_segms_mask.npy',{'all_boxes': all_boxes, 'all_segms': all_segms})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading and preparing results...\n",
      "DONE (t=1.47s)\n",
      "creating index...\n",
      "index created!\n",
      "Running per image evaluation...\n",
      "Evaluate annotation type *bbox*\n",
      "DONE (t=36.72s).\n",
      "Accumulating evaluation results...\n",
      "DONE (t=5.45s).\n",
      " Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.409\n",
      " Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.619\n",
      " Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.448\n",
      " Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.235\n",
      " Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.442\n",
      " Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.539\n",
      " Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.329\n",
      " Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.510\n",
      " Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.533\n",
      " Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.335\n",
      " Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.571\n",
      " Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.676\n"
     ]
    }
   ],
   "source": [
    "# Compute evaluation metrics\n",
    "coco_box_eval = evaluate_boxes(json_dataset=dataset.coco, \n",
    "                           all_boxes=all_boxes, \n",
    "                           output_dir='files/results/',\n",
    "                           use_salt=False, cleanup=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading and preparing results...\n",
      "DONE (t=2.87s)\n",
      "creating index...\n",
      "index created!\n",
      "Running per image evaluation...\n",
      "Evaluate annotation type *segm*\n",
      "DONE (t=40.29s).\n",
      "Accumulating evaluation results...\n",
      "DONE (t=5.37s).\n",
      " Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.364\n",
      " Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.585\n",
      " Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.387\n",
      " Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.166\n",
      " Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.392\n",
      " Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.540\n",
      " Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.303\n",
      " Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.459\n",
      " Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.478\n",
      " Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.266\n",
      " Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.520\n",
      " Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.642\n"
     ]
    }
   ],
   "source": [
    "coco_segm_eval = evaluate_masks(json_dataset=dataset.coco, \n",
    "                           all_boxes=all_boxes,\n",
    "                           all_segms=all_segms,\n",
    "                           output_dir='files/results/',\n",
    "                           use_salt=False, cleanup=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python (detectorch0.3)",
   "language": "python",
   "name": "detectorch03"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
